{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Execute this cell to install dependencies\n",
    "%pip install sf-hamilton[visualization] pandas scikit-learn numpy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MPG Simple [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimple.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimple.ipynb)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "f7ca0a2e-99c4-49de-af45-c8c4bddf5685",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "from hamilton import driver\n",
    "from IPython.display import HTML, display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "45fcd1cf-5dee-4d3c-b598-823c82654805",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "# this will load some jupyter magic to help us\n",
    "%load_ext hamilton.plugins.jupyter_magic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "155ea802-aef6-4d5c-b264-d9ec5b57c733",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "%%cell_to_module -m pipeline --display\n",
    "# when done you can write to file and then load it as a module normally\n",
    "# add -w to write to file  \n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.metrics import mean_absolute_error\n",
    "\n",
    "# Write 4 functions\n",
    "\n",
    "# 1. load/create the pandas dataframe\n",
    "\n",
    "# 2. Create the data_sets\n",
    "\n",
    "# 3. Create the linear model\n",
    "\n",
    "# 4. Evaluate the model\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "17b10355-4e25-4e75-84c5-fd95c0bd3dfb",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "# This is a driver and is used to construct the DAG\n",
    "dr = driver.Builder().with_modules(pipeline).build()\n",
    "graph = dr.display_all_functions()\n",
    "display(HTML(graph.pipe(format=\"svg\").decode(\"utf-8\")))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "923eff9a-ce20-484e-a4c7-acfbebb58e16",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "# What we'll want to execute/get out of the graph\n",
    "result = dr.execute([\"evaluated_model\", \"linear_model\"])\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "0bba7e14-7076-4277-850e-9d261760dcba",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "# Visualize Overrides\n",
    "dr.visualize_execution([\"evaluated_model\"], \n",
    "                           overrides={\"linear_model\": result[\"linear_model\"]})\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "2926af67-421c-467c-b691-1f8763a9afbf",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "# execute with overrides\n",
    "dr.execute([\"evaluated_model\"], \n",
    "           overrides={\"linear_model\": result[\"linear_model\"]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "application/vnd.databricks.v1+notebook": {
   "dashboards": [],
   "language": "python",
   "notebookMetadata": {
    "mostRecentlyExecutedCommandWithImplicitDF": {
     "commandId": 2746022128672019,
     "dataframes": [
      "_sqldf"
     ]
    },
    "pythonIndentUnit": 4
   },
   "notebookName": "MPG Simple V1",
   "widgets": {}
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
